{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.grid_search import GridSearchCV\n",
    "from sklearn.metrics import classification_report\n",
    "from PIL import Image\n",
    "\n",
    "\n",
    "X = []\n",
    "y = []\n",
    "for path, subdirs, files in os.walk('data/English/Img/GoodImg/Bmp/'):\n",
    "    for filename in files:\n",
    "        f = os.path.join(path, filename)\n",
    "        target = filename[3:filename.index('-')]\n",
    "        img = Image.open(f).convert('L').resize((30, 30), resample=Image.LANCZOS)\n",
    "        X.append(np.array(img).reshape(900,))\n",
    "        y.append(target)\n",
    "X = np.array(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 30 candidates, totalling 90 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed: 34.0min\n",
      "[Parallel(n_jobs=3)]: Done  90 out of  90 | elapsed: 67.7min finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best score: 0.073\n",
      "Best parameters set:\n",
      "\tclf__C: 1\n",
      "\tclf__gamma: 0.01\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "        001       0.00      0.00      0.00         9\n",
      "        002       0.00      0.00      0.00         8\n",
      "        003       0.00      0.00      0.00         7\n",
      "        004       0.00      0.00      0.00         7\n",
      "        005       0.00      0.00      0.00         4\n",
      "        006       0.00      0.00      0.00         9\n",
      "        007       0.00      0.00      0.00         5\n",
      "        008       0.00      0.00      0.00         5\n",
      "        009       0.00      0.00      0.00         2\n",
      "        010       0.00      0.00      0.00         2\n",
      "        011       0.07      1.00      0.13        55\n",
      "        012       0.00      0.00      0.00        17\n",
      "        013       0.00      0.00      0.00        20\n",
      "        014       0.00      0.00      0.00        20\n",
      "        015       0.00      0.00      0.00        46\n",
      "        016       0.00      0.00      0.00         9\n",
      "        017       0.00      0.00      0.00        10\n",
      "        018       0.00      0.00      0.00        12\n",
      "        019       0.00      0.00      0.00        31\n",
      "        020       0.00      0.00      0.00        11\n",
      "        021       0.00      0.00      0.00        12\n",
      "        022       0.00      0.00      0.00        15\n",
      "        023       0.00      0.00      0.00        12\n",
      "        024       0.00      0.00      0.00        37\n",
      "        025       0.00      0.00      0.00        46\n",
      "        026       0.00      0.00      0.00        13\n",
      "        027       0.00      0.00      0.00         5\n",
      "        028       0.00      0.00      0.00        33\n",
      "        029       0.00      0.00      0.00        40\n",
      "        030       0.00      0.00      0.00        29\n",
      "        031       0.00      0.00      0.00         9\n",
      "        032       0.00      0.00      0.00         7\n",
      "        033       0.00      0.00      0.00         4\n",
      "        034       0.00      0.00      0.00         7\n",
      "        035       0.00      0.00      0.00        10\n",
      "        036       0.00      0.00      0.00         5\n",
      "        037       0.00      0.00      0.00        15\n",
      "        038       0.00      0.00      0.00         3\n",
      "        039       0.00      0.00      0.00         8\n",
      "        040       0.00      0.00      0.00         3\n",
      "        041       0.00      0.00      0.00        26\n",
      "        042       0.00      0.00      0.00         3\n",
      "        043       0.00      0.00      0.00         3\n",
      "        044       0.00      0.00      0.00         4\n",
      "        045       0.00      0.00      0.00        10\n",
      "        046       0.00      0.00      0.00         4\n",
      "        047       0.00      0.00      0.00         5\n",
      "        048       0.00      0.00      0.00         2\n",
      "        049       0.00      0.00      0.00         6\n",
      "        050       0.00      0.00      0.00        19\n",
      "        051       0.00      0.00      0.00        18\n",
      "        052       0.00      0.00      0.00         4\n",
      "        053       0.00      0.00      0.00         4\n",
      "        054       1.00      0.10      0.18        10\n",
      "        055       0.00      0.00      0.00        11\n",
      "        056       0.00      0.00      0.00        10\n",
      "        057       0.00      0.00      0.00         6\n",
      "        058       0.00      0.00      0.00         2\n",
      "        059       0.00      0.00      0.00         6\n",
      "        060       0.00      0.00      0.00         7\n",
      "        061       0.00      0.00      0.00         6\n",
      "        062       0.00      0.00      0.00         3\n",
      "\n",
      "avg / total       0.02      0.07      0.01       771\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/gavin/classpass-activity-tagger/venv/local/lib/python2.7/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n"
     ]
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state=11)\n",
    "pipeline = Pipeline([\n",
    "    ('clf', SVC(kernel='rbf', gamma=0.01, C=100))\n",
    "])\n",
    "parameters = {\n",
    "    'clf__gamma': (0.01, 0.03, 0.1, 0.3, 1),\n",
    "    'clf__C': (0.1, 0.3, 1, 3, 10, 30),\n",
    "}\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    grid_search = GridSearchCV(pipeline, parameters, n_jobs=3, verbose=1, scoring='accuracy')\n",
    "    grid_search.fit(X_train, y_train)\n",
    "    print('Best score: %0.3f' % grid_search.best_score_)\n",
    "    print('Best parameters set:')\n",
    "    best_parameters = grid_search.best_estimator_.get_params()\n",
    "    for param_name in sorted(parameters.keys()):\n",
    "        print('\\t%s: %r' % (param_name, best_parameters[param_name]))\n",
    "    predictions = grid_search.predict(X_test)\n",
    "    print(classification_report(y_test, predictions))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
